rm(list =ls())
options(scipen=999)
gc()
packages <-c("tidyverse","estimatr","plm","stargazer","fastDummies","readstata13","ihs","AER","remotes")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
remotes::install_github("ChandlerLutz/starpolishr")

rm(packages, new.packages)

setwd(dirname(dirname(rstudioapi::getSourceEditorContext()$path)))


## insheet data
data <- read.csv("./Datasets/panel_bare_bones.csv")

## clean data
data$X <- NULL
data$locality_year <- NULL
data$subbotnik_inkind <- NULL
data$locality <- as.character(tolower(data$locality))

controls <- c("locality", "region", "pop", "schools", "shops", "restaurants", "cinemas",
              "Frac48", "coal_sqm", "coal_mines", "minerals", "partition", "priests_continuous", "priests_relocated",
              "pop", "protests_40s", "sabotage_40s","terror_40s", "income_76_capita", 
              "income_77_capita")

## load controls
data <- merge(data, read.csv("./Datasets/main_confounders.csv")[,controls], by = c("locality"), all.x = T)
data$region_numeric <- as.numeric(as.factor(data$region))
rm(controls)
data$priests_continuous <- data$priests_relocated

## add Prussian census controls
data <- merge(data, read.csv("./Datasets/prussia_clean.csv"), by = c("locality"), all.x = T)


################################
### REGRESSIONS FOR SABOTAGE ###
################################

#subset to years with observations:
data_t <- data[which(data$year<1980),]

# turn subbotnik upside down so that it accurately capture sabotage
data_t$subbotnik_inkind_ <- data_t$subbotnik_inkind_*-1

# create wealth index
data_t$shops <- scale(data_t$shops)
data_t$restaurants <- scale(data_t$restaurants)
data_t$cinemas <- scale(data_t$cinemas)
data_t$wealth_index <- rowSums(data_t[,c("shops","restaurants","cinemas")], na.rm = T)
head(data_t[,c("shops", "restaurants", "cinemas", "wealth_index")], 50)
data_t$wealth_index[data_t$wealth_index == "NaN"] = NA  
data_t$wealth_index <- scale(data_t$wealth_index)

# create state capacity index
data_t$state_capacity_index <- scale(data_t$schools)

# create ethnic diversity index
data_t$ethnic_diversity_index <- scale(data_t$Frac48)

# create Russian occupation index
data_t$Russian_occupation_index <- ifelse(data_t$partition=="Russian",1,0)
data_t$Russian_occupation_index <- scale(data_t$Russian_occupation_index)

# scale industrialization
data_t$industrialization_index <- scale(data_t$minerals)

# scale original industrialization
data_t$coal_sqm <- scale(data_t$coal_sqm)
data_t$minerals <- scale(data_t$minerals)
data_t$industrialization_index_o <- rowSums(data_t[,c("coal_sqm","minerals")], na.rm = F)
data_t$industrialization_index_o[data_t$industrialization_index_o == "NaN"] = NA  
data_t$industrialization_index_o <- scale(data_t$industrialization_index_o)

# scale grievance
data_t$grievance <- scale(data_t$coal_sqm)

# scale grievance II
data_t$grievance_ii <- scale(data_t$coal_mines)

# scale protests 40s
data_t$protests_40s <- scale(data_t$protests_40s)

# scale sabotage 40s
data_t$sabotage_40s <- scale(data_t$sabotage_40s)

# scale terror 40s
data_t$terror_40s <- scale(data_t$terror_40s)

# scale incomes
data_t$income_76_capita <- scale(data_t$income_76_capita)
data_t$income_77_capita <- scale(data_t$income_77_capita)
data_t$income <- rowSums(data_t[,c("income_76_capita", "income_77_capita")], na.rm = T)
data_t$income <- scale(data_t$income)

# scale jews
data_t$jew_perc[is.na(data_t$jew_perc)] = mean(data_t$jew_perc, na.rm=TRUE)
data_t$jew_perc <- scale(data_t$jew_perc)


# average over years
data_agg <- aggregate(data_t[,c("locality_numeric", "subbotnik_inkind_", "commanders", 
                                "priests_continuous", "pop", "region_numeric", "wealth_index", 
                                "state_capacity_index", "ethnic_diversity_index", "Russian_occupation_index", 
                                "industrialization_index", "industrialization_index_o", "income", "grievance", "grievance_ii", "protests_40s",
                                "sabotage_40s", "terror_40s", "jew_perc")], by=list(Category=data_t$locality_numeric), FUN=mean, na.rm=T)





# regressions
no_ctrl = ivreg(scale(subbotnik_inkind_) ~ commanders | priests_continuous , data = data_agg)
o_ctrl = ivreg(scale(subbotnik_inkind_) ~ commanders | priests_continuous + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index_o + factor(region_numeric), data = data_agg)
o_ctrl_2u = ivreg(scale(subbotnik_inkind_) ~ commanders + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index_o + factor(region_numeric) | priests_continuous + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index_o + factor(region_numeric), data = data_agg)
n_ctrl_2u = ivreg(scale(subbotnik_inkind_) ~ commanders + pop + wealth_index + income + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance + grievance_ii + protests_40s + sabotage_40s + terror_40s + jew_perc + factor(region_numeric)  | priests_continuous + pop + wealth_index + income + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance + grievance_ii + protests_40s + sabotage_40s + terror_40s + jew_perc + factor(region_numeric) , data = data_agg)

# combine to list
sabotage_models <- list(no_ctrl, o_ctrl, o_ctrl_2u, n_ctrl_2u)

# full output including controls
stargazer(no_ctrl, o_ctrl, o_ctrl_2u, n_ctrl_2u,
          type="text",
          dep.var.labels = c("Sabotage"),
          covariate.labels = c("Surveillance","Population","Wealth","Income",
                               "State capacity","Ethnic diversity","Russian occupation",
                               "Industrialization (old)","Industrialization (new)",
                               "Grievances (coal)","Grievances (mines)","Protests (40s)",
                               "Sabotage (40s)","Terror (40s)","Jews (1871)"),
          style = "qje",
          star.char = c("*", "**", "***"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          omit = c("Constant|region"),
          omit.stat = c("rsq", "ser"), 
          omit.table.layout = "n",
          add.lines = list(c("Controls", "No", "Yes (old)", "Yes (old)", "Yes (new)"),
                           c("Both stages", "NA", "No", "Yes", "Yes"),
                           c("FEs",      "No", "Yes"      , "Yes"      , "Yes"  )),
          out = "./output/TableA13_CORRECTED_sabotage.tex",
          float=F)



###############################
### REGRESSIONS FOR STRIKES ###
###############################

# subset to years with observations:
data_t_late <- data[which(data$year>1979),]

# average across years
data_agg_late <- aggregate(data_t_late[,c("locality_numeric", "strikes", "commanders", "priests_continuous", "region_numeric")], by=list(Category=data_t_late$locality_numeric), FUN=mean, na.rm=T)

# attach covariates from above
data_agg_late <- merge(data_agg_late, data_agg[,c("locality_numeric", "pop", "wealth_index", 
                                                  "state_capacity_index", "ethnic_diversity_index", "Russian_occupation_index", 
                                                  "industrialization_index", "industrialization_index_o", "income", "grievance", "grievance_ii", "protests_40s",
                                                  "sabotage_40s", "terror_40s", "jew_perc")], by = "locality_numeric", all.x = T)


# regressions
no_ctrl = ivreg(scale(strikes) ~ commanders | priests_continuous , data = data_agg_late)
o_ctrl = ivreg(scale(strikes) ~ commanders | priests_continuous + pop + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index_o + factor(region_numeric), data = data_agg_late)
o_ctrl_2u = ivreg(scale(strikes) ~ commanders + pop + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index_o + factor(region_numeric) | priests_continuous + pop + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index_o + factor(region_numeric), data = data_agg_late)
n_ctrl_2u = ivreg(scale(strikes) ~ commanders + pop + wealth_index + income + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance + grievance_ii + protests_40s + sabotage_40s + terror_40s + jew_perc + factor(region_numeric)  | priests_continuous + pop + wealth_index + income + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance + grievance_ii + protests_40s + sabotage_40s + terror_40s + jew_perc + factor(region_numeric) , data = data_agg_late)

# combine to list
protest_models <- list(no_ctrl, o_ctrl, o_ctrl_2u, n_ctrl_2u)

# full output including controls
stargazer(no_ctrl, o_ctrl, o_ctrl_2u, n_ctrl_2u,
          type="text",
          dep.var.labels = c("Protests"),
          covariate.labels = c("Surveillance","Population","Wealth","Income",
                               "State capacity","Ethnic diversity","Russian occupation",
                               "Industrialization (old)","Industrialization (new)",
                               "Grievances (coal)","Grievances (mines)","Protests (40s)",
                               "Sabotage (40s)","Terror (40s)","Jews (1871)"),
          style = "qje",
          star.char = c("*", "**", "***"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          omit = c("Constant|region"),
          omit.stat = c("rsq", "ser"), 
          omit.table.layout = "n",
          add.lines = list(c("Controls", "No", "Yes (old)", "Yes (old)", "Yes (new)"),
                           c("Both stages", "NA", "No", "Yes", "Yes"),
                           c("FEs",      "No", "Yes"      , "Yes"      , "Yes"  )),
          out = "./output/TableA13_CORRECTED_protests.tex",
          float=F)


# combined output
stargazer(protest_models, sabotage_models,
          type="text",
          dep.var.labels = c("Protests", "Sabotage"),
          style = "qje",
          covariate.labels = c("Commanders","Population","Wealth","Income",
                               "State capacity","Ethnic diversity","Russian occupation",
                               "Industrialization (old)","Industrialization (new)",
                               "Grievances (coal)","Grievances (mines)","Protests (40s)",
                               "Sabotage (40s)","Terror (40s)","Jews (1871)"),
          star.char = c("*", "**", "***"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          omit = c("Constant", "factor"),
          omit.stat = c("rsq", "ser"), 
          omit.table.layout = "n",
          add.lines = list(c("Controls", "No", "Yes (old)", "Yes (old)", "Yes (new)", "No", "Yes (old)", "Yes (old)", "Yes (new)"),
                           c("Both stages", "NA", "No", "Yes", "Yes", "NA", "No", "Yes", "Yes"),
                           c("FEs",      "No", "Yes"      , "Yes"      , "Yes", "No", "Yes"      , "Yes"      , "Yes"  )),
          out = "./output/TableA13_CORRECTED.tex",
          float=FALSE)
